# Author: Mark Richardson
# Purpose: Wrangle 2020 skills ratings for passing to JAGS

# Load packages
library(dplyr)
library(stringr)

# Load 2020 data
load("data/sfgs_2020.RData")

sfgs <- sfgs %>% filter(responded == 1)

# Format the data
skills <- sfgs %>%
  select(dept, office, works_with_1:works_with_3, agency_rated_1:agency_rated_5, skills_rating_1:skills_rating_5) %>%
  mutate(skills_rating_1 = if_else(skills_rating_1 == -99, NA_real_, skills_rating_1),
         skills_rating_2 = if_else(skills_rating_2 == -99, NA_real_, skills_rating_2),
         skills_rating_3 = if_else(skills_rating_3 == -99, NA_real_, skills_rating_3),
         skills_rating_4 = if_else(skills_rating_4 == -99, NA_real_, skills_rating_4),
         skills_rating_5 = if_else(skills_rating_5 == -99, NA_real_, skills_rating_5))

### Get subset of informed ratings ###

ww_1 <- skills %>%
  filter(str_detect(works_with_1, agency_rated_1)) %>%
  select(works_with_1, agency_rated_1, skills_rating_1) %>%
  rename(works_with = works_with_1, agency_rated = agency_rated_1, skills_rating = skills_rating_1)

ww_2 <- skills %>%
  filter(str_detect(works_with_2, agency_rated_2)) %>%
  select(works_with_2, agency_rated_2, skills_rating_2) %>%
  rename(works_with = works_with_2, agency_rated = agency_rated_2, skills_rating = skills_rating_2)

ww_3 <- skills %>%
  filter(str_detect(works_with_3, agency_rated_3)) %>%
  select(works_with_3, agency_rated_3, skills_rating_3) %>%
  rename(works_with = works_with_3, agency_rated = agency_rated_3, skills_rating = skills_rating_3)

# Check for other matches
# Note: if respondent skips the first works-with and answers the second works-with,
# then first rating with be second works-with.

other <- skills %>%
  filter( (str_detect(works_with_1, agency_rated_2) & (works_with_1 != "Office of Management and Budget" & agency_rated_2 != "Management") &
             (works_with_1 != "Office Science and Technology Policy" & agency_rated_2 != "Office of Science")) |
            (str_detect(works_with_1, agency_rated_3) & (works_with_1 != "Office of Management and Budget" & agency_rated_3 != "Management") &
               (works_with_1 != "Office Science and Technology Policy" & agency_rated_3 != "Office of Science"))  |
            (str_detect(works_with_2, agency_rated_1) & (works_with_2 != "Office of Management and Budget" & agency_rated_1 != "Management") &
               (works_with_2 != "Office Science and Technology Policy" & agency_rated_1 != "Office of Science"))  |
            (str_detect(works_with_2, agency_rated_3) & (works_with_2 != "Office of Management and Budget" & agency_rated_3 != "Management") &
               (works_with_2 != "Office Science and Technology Policy" & agency_rated_3 != "Office of Science"))  |
            (str_detect(works_with_3, agency_rated_1) & (works_with_3 != "Office of Management and Budget" & agency_rated_1 != "Management") &
               (works_with_3 != "Office Science and Technology Policy" & agency_rated_1 != "Office of Science"))  |
            (str_detect(works_with_3, agency_rated_2) & (works_with_3 != "Office of Management and Budget" & agency_rated_2 != "Management") &
               (works_with_3 != "Office Science and Technology Policy" & agency_rated_2 != "Office of Science")) )

# Some respondents selecting the same agency multiple times

# Subset to cases - all works-withs equal and combinations of only two works-withs equal

# ww_1 = ww_2 = ww_3
other_123 <- other %>%
  filter(works_with_1 == works_with_2 & works_with_2 == works_with_3 & works_with_1 != "-99")

# ww_1 = ww_2 != ww_3
other_12 <- other %>%
  filter(works_with_1 == works_with_2 & works_with_2 != works_with_3 & works_with_1 != "-99")

# ww_1 != ww_2 == ww_3
other_23 <- other %>%
  filter(works_with_1 != works_with_2 & works_with_2 == works_with_3 & works_with_2 != "-99")

# ww_1 != ww_2 & ww_1 = ww_3
other_13 <- other %>%
  filter(works_with_1 == works_with_3 & works_with_1 != works_with_2 & works_with_1 != "-99")

other_known <- bind_rows(other_123, other_12, other_23, other_13)

nrow(other_known[duplicated(other_known), ]) # No duplicates

other_unknown <- setdiff(other, other_known) # One case with -99 for ww_1 and ww_3 and informed rating for DOI - agency_rated_1

# Get informed ratings from duplicated works-withs

dup_123 <- other_123 %>%
  select(works_with_1, agency_rated_1, skills_rating_1) %>%
  rename(works_with = works_with_1, agency_rated = agency_rated_1, skills_rating = skills_rating_1)

dup_12 <- other_12 %>%
  select(works_with_1, agency_rated_1, skills_rating_1) %>%
  rename(works_with = works_with_1, agency_rated = agency_rated_1, skills_rating = skills_rating_1)

dup_23 <- other_23 %>%
  select(works_with_2, agency_rated_2, skills_rating_2) %>%
  rename(works_with = works_with_2, agency_rated = agency_rated_2, skills_rating = skills_rating_2)

dup_13 <- other_13 %>%
  select(works_with_1, agency_rated_1, skills_rating_1) %>%
  rename(works_with = works_with_1, agency_rated = agency_rated_1, skills_rating = skills_rating_1)

other_unknown_inf <- other_unknown %>%
  select(works_with_2, agency_rated_1, skills_rating_1) %>%
  rename(works_with = works_with_2, agency_rated = agency_rated_1, skills_rating = skills_rating_1)
  

skills_inf <- bind_rows(ww_1, ww_2, ww_3,
                      dup_123, dup_12, dup_23, dup_13, other_unknown_inf)

# Clean up
rm(list = ls()[str_detect(ls(), "dup|other|ww")])

#### Get informed priors ####

skills_inf_priors <- skills_inf %>%
  filter(!is.na(skills_rating)) %>%
  group_by(agency_rated) %>%
  summarize(n = n(),
            skills_inf_mean = mean(skills_rating),
            skills_inf_var = var(skills_rating))

#### Get ratings and workplace ####

skills_ratings <- skills %>%
  select(dept, office, agency_rated_1:skills_rating_5)

#### Save the ratings and informed priors ####

save(skills_ratings, skills_inf_priors, file = "data/ratings/skills_ratings/skills_ratings_for_model.RData")